﻿using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Elderos.Utils;

namespace Elderos.Highlight.Search
{
    public static class Tokenizer
    {
        private static readonly Regex TokenizerRegex = new Regex(@"(?:[\wё]+|[\W])", RegexOptions.Compiled);

        public static Token[] GetTokens(string text)
        {
            var tokens = TokenizerRegex
                .Matches(text)
                .Cast<Match>()
                .ToList();
            return tokens
                .Select(x => new Token(x.Value, x.Index))
                .ToArray();
        }
    }
}
